2021 VIS Area Curation Committee Executive Summary

Summary

We use submission and bidding information from VIS 2021 to analyze the impact of moving to an area model. Given the information we have access to, the move appears to be broadly successful, and we only make small recommendations on example descriptions of areas, and keywords to change. Our analysis suggests that submissions are relatively balanced across areas, keywords are (with a small exception) well distributed, and the unified PC appears to provide broad and overlapping coverage.

Committee members: Alex Endert (chair), Steven Drucker, Issei Fujishiro, Christoph Garth, Heidi Lam, Heike Leitte, Carlos Scheidegger, Hendrik Strobelt, Penny Rheingans.

Last edited: 2021-08-23.

Code

# imports

import itertools

import pandas as pd
import numpy as np

import plotly.io as pio
import plotly.graph_objs as go
import plotly.express as px

pio.renderers.default = "notebook_connected"
width = 740

# data preparation

# static data – codes -> names etc.
decisions = { 
    'C': 'Accept',
    'R': 'Reject',
    'R-2nd': 'Reject',
    'DR-S': 'Desk Reject (Scope)',
    'DR-P': 'Desk Reject (Plagiarism)',
}

decision_colors = {
    'Accept': '#82E0AA',
    'Reject': '#F1948A',
    'Desk Reject (Scope)': '#D7BDE2',
    'Desk Reject (Plagiarism)': '#F9E79F',
}

areas = {
    'T&E': 'Theoretical & Empirical',
    'App': 'Applications',
    'S&R': 'Systems & Rendering',
    'R&I': 'Representations & Interaction',
    'DTr': 'Data Transformations',
    'A&D': 'Analytics & Decisions',
}

bid = { 
    '1': 'want',
    '2': 'willing',
    '3': 'reluctant',
    'C': 'conflict'
}

keywords = pd.read_csv("data/2021/keywords.csv", sep=';')

keyword_category_colors = {
    'Data Type': '#fce69e',
    'Contribution Theme': '#a9c0f2',
    'Application Area': '#dce9d4',
    'Topic': '#e0bab0'
}

# load submissions table / replace short names
submissions = pd.read_csv("data/2021/anon_vis21b_submission.csv", index_col='Paper ID')
submissions = submissions.replace({'Decision': decisions, 'Area': areas})

# load bidding table and undo CSV encoding
raw_bids = pd.read_csv("data/2021/anon_vis21b_committee_bidding.csv", skiprows=2)

raw_bids = (raw_bids
    .set_index('ID')
    .dropna(how = 'all')
    .rename(columns = { x: int(x.split(' ')[0]) for x in raw_bids.columns[1:] })
    .rename(index = lambda v: 'type' if np.isnan(v) else int(v))
    .transpose()
    .rename_axis(index = 'Reviewer')
    .rename_axis(columns = 'Paper ID')
)

# parse the bidding table into bids, match scores, and assignments
matchscores = (raw_bids
    .query('type == "match"')
    .drop(columns = 'type')
    .sort_index(axis=0)
    .sort_index(axis=1)
    .astype(np.float32)
)

assignments = (raw_bids
    .query('type == "stat"')
    .drop(columns = 'type')
    .stack()
    .reset_index()
    .rename(columns = {0: 'Role'})
    .replace({ 'Seco': 'Secondary', 'Prim': 'Primary' })
)

bids = (raw_bids
    .query('type == "bid"')
    .drop(columns = 'type')
    .stack()
    .reset_index()
    .rename(columns = {0: 'Bid'})
    .replace({ 'C': 'conflict', '1': 'want', '2': 'willing', '3': 'reluctant' })
)

# plot defaults

acc_template = go.layout.Template()

acc_template.layout = dict(
    font = dict( 
        family='Fira Sans',
        color = 'black',
        size = 13
    ),
    title_font_size = 14,
    plot_bgcolor = 'rgba(255,255,255,0)',
    paper_bgcolor = 'rgba(255,255,255,0)',
    margin = dict(pad=10),
    xaxis = dict(
        title = dict( 
            font = dict( family='Fira Sans Medium', size=13 ),
            standoff = 10
        ),
        gridcolor='lightgray',
        gridwidth=1,
        automargin = True,
        fixedrange = True,
    ),
    yaxis = dict(
        title = dict( 
            font = dict( family='Fira Sans Medium', size=13 ),
            standoff = 10,
        ),
        gridcolor='lightgray',
        gridwidth=1,
        automargin = True,
        fixedrange = True,
    ),
    legend=dict(
        title_font_family="Fira Sans Medium",
    ),
    colorway = px.colors.qualitative.T10,
    hovermode = 'closest',
    hoverlabel=dict(
        bgcolor="white",
        bordercolor='lightgray',
        font_color = 'black',
        font_family = 'Fira Sans'
    ),
)

acc_template.data.bar = [dict(
    textposition = 'inside',
    insidetextanchor='middle',
    textfont_size = 12,
)]

px.defaults.template = acc_template

px.defaults.category_orders = {
    'Decision': list(decisions.values()), 
    'Area': list(areas.values()),
    'Short Name': keywords['Short Name'].tolist()
}

config = dict(
    displayModeBar = False,
    scrollZoom = False,
    responsive = False
)

def aspect(ratio):
    return { 'width': width, 'height': int(ratio*900) }

# useful data sub-products

k_all = (submissions
    .join(submissions['Keywords'].str
        .split('; ', expand = True)
        .stack()
        .rename('Keyword')
    )
    .reset_index(level = 0)
    .merge(keywords, on='Keyword')
)

k_cnt = (k_all
    .value_counts('Short Name', sort=False)
    .rename('c')
    .to_frame()
    .merge(keywords, on='Short Name')
)

k_total = keywords.merge(
    k_all.value_counts(['Short Name'])
         .reset_index(),
    how = 'outer'
)

Overview

This report summarizes the process, findings, and recommendations by the VIS Area Curation Committee (ACC) regarding the areas and keywords used for paper submissions to IEEE VIS 2021. According to the Charter, the goal of this committee is to analyze and report how submissions made use of the areas and keywords to describe their contribution. It is important to understand when these descriptors no longer adequately cover the breadth of research presented at VIS.

This report is generated by members of the ACC for the current year, and prepared for the VSC. Upon review, it will be linked from the IEEE VIS website. The conclusions and discussion points are based on submission and reviewer data from IEEE VIS 2021. The report and analysis performed is focused on the use of keywords, areas, and reviewer matching. Thus, there are likely other aspects of conference organization which are not covered (but could be considered).

The report is broken down into the following sections. First, the data and analysis process is described. It shows which data we used, where it is stored, and how it is obtained. These processes can be adapted for future years of this committee. Second, a discussion of key findings from our analysis. These are only highlights, with the complete analyses linked. Finally, it includes a collection of recommendations and noteworthy findings which should be “watched” next year to see if trends emerge.

Data and Process

The data used to perform this analysis is a combination of paper submission data and reviewer bidding data. Both sets were anonymized to minimize the ability to identify IPC members, authors, or reviewers. The scripts used to export the data from PCS and anonymize it can be found here.

The analysis of the data in this year uses the anonymized CSV files obtained directly from PCS. You can see the source code used to process and generate the plots in this document by clicking on the “Code” buttons, which will fold out the Python code used.

In order to facilitate longitudinal studies of this data, we are also providing a sqlite database with the 2021 data in an attempt to make it easier to incorporate 2022 data and so on. The code that generates this database can be found here.

Data Highlights

We analyzed anonymized data containing information about the full paper submissions to VIS 2021, the reviews of these submissions, and the IPC bidding preferences. We analyzed this data to understand how well the areas and keywords characterize the body of work submitted this year. We also analyzed the IPC bidding information to understand how well the expertise of the IPC members covers the submissions. Below, we show highlights of our findings.

Note that in the the analysis that follows, the submission/paper IDs and reviewer IDs are anonymized through a randomizer, and are not the IDs used in PCS submissions and reviewers.

Submissions per Area. We wanted to understand how submissions were distributed by area, including acceptance decisions. Submissions to each area were within reasonable upper and lower limits, and decisions did not appear partial to any individual area.

Code

tmp = (submissions
    .value_counts(['Area', 'Decision'])
    .reset_index()
    .rename(columns = {0: 'count'})
)

fig = px.bar(tmp,
    x = 'count',
    y = 'Area',
    barmode = 'stack',
    orientation = 'h',
    color = 'Decision',
    text = 'count',
    custom_data = ['Decision'],
).update_layout(
    title = 'Submissions by area',
    xaxis_title = 'Number of Submissions',
    **aspect(0.35)
).update_traces(
    hovertemplate = '%{x} submissions in %{y} have decision %{customdata[0]}<extra></extra>',
).show(config=config)
2424191814117461585640353221020406080100Analytics & DecisionsData TransformationsRepresentations & InteractionSystems & RenderingApplicationsTheoretical & Empirical
DecisionAcceptRejectDesk Reject (Scope)Desk Reject (Plagiarism)Submissions by areaNumber of SubmissionsArea

Keywords Used. We also analyzed the frequency of how often keywords were used in the submissions. The frequency of keywords used is reasonable. The one exception which should be watched for next year is “Application”, which may require further specification or description.

Code

# do a manual histogram to include non-specified keywords

px.bar(k_total,
    x = 'Short Name',
    y = 0,
    color = 'Category',
    # color_discrete_map=keyword_category_colors,
).update_traces(
    hovertemplate = "'%{x}' specified in %{y} submissions<extra></extra>",
).update_layout(
    xaxis_tickfont_size = 8,
    xaxis_dtick = 1,
    yaxis_dtick = 20,
    xaxis_title = 'Keyword',
    yaxis_title = 'Number of Submissions',
    legend_title = 'Keyword Category',
    hovermode = 'closest',
    title = 'Frequency of keywords across submissions',
    **aspect(0.4)
).show(config=config)
GeospatialNetworkHDModelsScalarImageVideoTabularTimeTextVector_TensorOtherDataNADataAlgorithmDataAbstrDatasetsDeploymentMethodologyApplicationGuidelinesInteractionWorkflowSystemsSoftwareSTARDomain_TaskTheoryVisDesignOtherContribCompSystemsLifeBioMLStatsModelScienceEngrSocHumOtherAppNAAppCollabColorStorytellingAnalyzeDecideGenPublicMixedInitPerceptionPersonalVisClusterAggDataMgmtDimRedFeaturesBigDataMLMathCompTopIsosurfacesFlowVolumesCompBenchmarkHumanQualHumanQuantMotionArtMapsChartsComparisonGraphicsMultiViewImageProcessingDisplaysMultiResHardwareUncertaintyOtherTopic020406080100
Keyword CategoryData TypeContribution ThemeApplication AreaTopicFrequency of keywords across submissionsKeywordNumber of Submissions

Unified PC Expertise. Finally, we want to highlight that bidding information from the PC members indicate that moving to a unified PC appears to provide ample coverage of expertise for the papers submitted.

Code

tmp = (bids
    .value_counts(['Paper ID', 'Bid'], sort=False)
    .reset_index()
    .loc[lambda x: x.Bid.isin(['want', 'willing'])]
    .rename(columns = {0: 'Number of Bids'})
)

px.bar(tmp,
    x = 'Paper ID',
    y = 'Number of Bids',
    color = 'Bid'
).update_layout(
    xaxis_type = 'category',
    xaxis_categoryorder = 'total descending',
    xaxis_showticklabels = False,
    title = 'Positive Bids per Paper',
    **aspect(0.4),
).update_traces(
    hovertemplate = 'Paper %{x} received %{y} "%{fullData.name}" bids.<extra></extra>',
).show(config=config)
010203040
BidwantwillingPositive Bids per PaperPaper IDNumber of Bids

Cross-area reviewing among the unified PC. One concern for the unified PC was that PC members would be isolated to their respective areas, further increasing the fragmentation of the community. From the number of areas each PC member reviewed in, however, this does not appear to be the case:

Code

tmp = (assignments
    .merge(submissions, on='Paper ID')
    .groupby('Reviewer')
    .apply(lambda x: len(x['Area'].unique()))
    .reset_index())

px.histogram(tmp,
    x = 0,
).update_traces(
    hovertemplate = '%{y} PC members were assigned submissions from %{x} area(s)',
).update_layout(
    bargap = .1,
    xaxis_title = 'Number of Areas',
    yaxis_title = 'Number of PC members',
    **aspect(0.4),
).show(config=config)
12345020406080
Number of AreasNumber of PC members

Recommendations

The ACC has the following recommendations with regards to Areas, Keywords, and Bidding for VIS 2022. We also have a list of “watchlist items” that we recommend keeping under observation for future years. We do not find these definitive enough to recommend changes, but they did create considerable discussion among the ACC and should be re-visited next year.

Areas

After reviewing the paper areas, we suggest some small changes to the descriptions and example papers. At this time, we do not recommend retiring areas or adding new ones. Specific changes recommended to example papers are:

  • We moved the following paper to Area 4 (Representation and Interaction):
    • A. Srinivasan and J. Stasko. “Orko: Facilitating Multimodal Interaction for Visual Network Exploration and Analysis.” IEEE Transactions on Visualization and Computer Graphics, 24(1): 2018.
  • We added the following paper to Area 5 (Data Transformation):
    • H. Strobelt, D. Oelke, C. Rohrdantz, A. Stoffel, D. A. Keim and O. Deussen “Document Cards: A Top Trumps Visualization for Documents.” IEEE Transactions on Visualization and Computer Graphics, vol. 15, no. 6, pp. 1145-1152, 2009.

Keywords

Based on our analysis of keyword frequencies and feedback we do not recommend removing keywords at this time, due primarily to the short review period so far. We recommend the following additions to keywords and keyword descriptions:

  • To the Data Types category: Sets, Ensemble models
  • To the Contribution (General) category: Ontology
  • To the Application Area category: Chemistry (to Life Sciences….), Astronomy (to Physical & Environmental…), Law, Economics, and Social Media (to Social Science…)
  • In the Topic category, Stats & Math to become Stats, Math & ML
  • To the Topic category:
    • Accessibility (in Human Factors),
    • Deep learning (in Stats&Math&ML),
    • Explainable AI (in Stats&Math&ML),
    • Multi-field/multi-dimensional (to (Spatial Field….),
    • Graph visualization/analysis and Glyph-based techniques (to General Visualization….)

We put forward the following observations and questions for continued consideration:

  • The keyword Application Motivated Visualization may be oversubscribed, appearing in about 25% of the submissions (110/434 overall, 30/110 of accepted papers, 80/324 of rejected papers). In contrast, the next most popular keywords Data Analysis, Reasoning, Problem Solving, and Decision Making and Machine Learning appear in 90 and 73 submissions each. This calls for further examination. Specifically, if we wish to split or clarify this keyword, what data do we base that revision on? We recommend analyzing this again next year and seeing if it continues to be used too frequently.
  • We discussed the issue of whether keywords could be used to indicate specific domain knowledge required to appropriately review some submissions and how that specific knowledge should be signalled by the author and used in the assignment process. One example is a potential keyword for sports visualization (appearing multiple times in the keyword feedback as a requested new keyword). If such a keyword were created and used in assignments, would reviewer expertise in cricket (for example) actually be useful in reviewing a paper about tennis? Specifically, there is an issue with the level of granularity of expertise in this domain (and presumably in other domains). This calls for further thought, hence we do not recommend adding a keyword for sports visualization at this time. We also observe that there is currently no way for reviewers to indicate expertise in a specific domain (for instance, tennis).
  • We suggest that decisions about adding keywords consider the cost of such additions in the submission, bidding, and other processes.

General Observations and Reflection

Based on the process this year, we have the following observations and reflections on various aspects of this committee and the overarching goals.

Data Collection: Submission, review, and bidding data is required for the analysis we performed. We have created a script to anonymize the PCS exports. However, there are challenges with OPCs having conflicts with papers, so the exports may not be complete. Further, the timing of generating this data conflates an already busy time for the paper chairs. In the future, perhaps having a separate committee (or person) generate these data exports may be preferred to offload some work from the paper chairs. One recommendation discussed was to have the person who manages PCS do it (with permission from the OPCs).

IPC Participation Data Usage: In the future, VIS should ask IPC members to acknowledge that their bidding data will be used by the ACC for operational improvements to VIS.

Match Score Transparency: We recommend VIS produce documentation on how the “match score” is computed for the IPC. We recommend this because we currently have no technical basis on which to evaluate the quality of the current matches; if the match score cannot be adequately explained or otherwise justified, we recommend moving to replace the current matching system with one that can be explained.

Full Analysis

(NB: Some of the plots shown above are repeated here for the sake of completeness.)

Submissions

How many papers were submitted to each area, and what is the breakdown of decisions?

Code

tmp = (submissions
    .value_counts(['Area', 'Decision'])
    .reset_index()
    .rename(columns = {0: 'count'})
)

fig = px.bar(tmp,
    x = 'count',
    y = 'Area',
    barmode = 'stack',
    orientation = 'h',
    color = 'Decision',
    text = 'count',
    custom_data = ['Decision'],
).update_layout(
    title = 'Submissions by area',
    xaxis_title = 'Number of Submissions',
    **aspect(0.35)
).update_traces(
    hovertemplate = '%{x} submissions in %{y} have decision %{customdata[0]}<extra></extra>',
).show(config=config)
2424191814117461585640353221020406080100Analytics & DecisionsData TransformationsRepresentations & InteractionSystems & RenderingApplicationsTheoretical & Empirical
DecisionAcceptRejectDesk Reject (Scope)Desk Reject (Plagiarism)Submissions by areaNumber of SubmissionsArea

Keywords

How often was a particular keyword specified?

Code

tc = [ dict(n=c, p='All', f=c) for c in k_cnt['Category'].unique() ]
ts = [ dict(n=s, p=c,  f=c) for _, c, s in k_cnt[['Category', 'Subcategory']].drop_duplicates().itertuples() if c != s ]
tl = [ dict(n=r['Short Name'], p=r.Category if r.Category == r.Subcategory else r.Subcategory, c=r.c, f=r.Category) for _, r in k_cnt.iterrows() ]

tree = pd.DataFrame(tc + ts + tl).fillna(0)

px.treemap(tree,
    names = tree.n,
    parents = tree.p,
    values = tree.c,
    color = tree.f,
    # color_discrete_map=keyword_category_colors,
).update_layout(
    margin = {'t': 0, 'b': 0, 'l': 0, 'r': 0},
    uniformtext=dict(minsize=10),
    **aspect(0.4)
).update_traces(
    hovertemplate = "'%{label}' specified in %{value} submissions<extra></extra>",
    marker_depthfade = 'reversed',
).show(config=config)
AllTopicContribution ThemeData TypeApplication AreaGeneral Visualization MethodsHuman FactorsStats & Math, Machine Learning, Data Management Methods & AlgorithmsEvaluation Methods and MethodologiesSpatial Field Methods & AlgorithmsGeneral ContributionsTimeNetworkHDImageVideoTabularGeospatialScalarTextModelsNADataOtherDataVector_TensorMLStatsModelSocHumNAAppLifeBioScienceEngrOtherAppCompSystemsMultiViewChartsComparisonDisplaysMultiResArtUncertaintyMotionOtherTopicGraphicsMapsImageProcessingHardwareAnalyzeDecideStorytellingGenPublicPerceptionMixedInitColorPersonalVisCollabMLClusterAggFeaturesDimRedBigDataDataMgmtMathHumanQuantHumanQualCompBenchmarkFlowCompTopVolumesApplicationVisDesignAlgorithmTheoryInteractionWorkflowMethodologySoftwareDataAbstrSystemsGuidelinesDomain_TaskDatasetsSTARDeploymentOtherContrib

Count of keywords

Code

# do a manual histogram to include non-specified keywords

px.bar(k_total,
    x = 'Short Name',
    y = 0,
    color = 'Category',
    # color_discrete_map=keyword_category_colors,
).update_traces(
    hovertemplate = "'%{x}' specified in %{y} submissions<extra></extra>",
).update_layout(
    xaxis_tickfont_size = 8,
    xaxis_dtick = 1,
    yaxis_dtick = 20,
    xaxis_title = 'Keyword',
    yaxis_title = 'Number of Submissions',
    legend_title = 'Keyword Category',
    hovermode = 'closest',
    title = 'Frequency of keywords across submissions',
    **aspect(0.4)
).show(config=config)
GeospatialNetworkHDModelsScalarImageVideoTabularTimeTextVector_TensorOtherDataNADataAlgorithmDataAbstrDatasetsDeploymentMethodologyApplicationGuidelinesInteractionWorkflowSystemsSoftwareSTARDomain_TaskTheoryVisDesignOtherContribCompSystemsLifeBioMLStatsModelScienceEngrSocHumOtherAppNAAppCollabColorStorytellingAnalyzeDecideGenPublicMixedInitPerceptionPersonalVisClusterAggDataMgmtDimRedFeaturesBigDataMLMathCompTopIsosurfacesFlowVolumesCompBenchmarkHumanQualHumanQuantMotionArtMapsChartsComparisonGraphicsMultiViewImageProcessingDisplaysMultiResHardwareUncertaintyOtherTopic020406080100
Keyword CategoryData TypeContribution ThemeApplication AreaTopicFrequency of keywords across submissionsKeywordNumber of Submissions

How are keywords distributed across areas?

Code

# do a manual histogram to include non-specified keywords
k_cnt = keywords.merge(
    pd.DataFrame(areas.values(), columns = ['Area']), 
    how = 'cross'
).merge(
    k_all
        .value_counts(['Short Name', 'Area'])
        .reset_index(),
    how = 'outer'
).fillna(1e-10) # needed for sorting, Plotly bug?

px.bar(k_cnt,
    x = 'Short Name',
    y = 0,
    color = 'Area',
    custom_data = ['Area']
).update_traces(
    hovertemplate = 'Keyword "%{x}" specified by %{y} submissions from area "%{customdata}"<extra></extra>'
).update_layout(
    barmode = 'stack',
    yaxis_title = 'Number of Submissions',
    xaxis_dtick = 1,
    xaxis_tickfont_size = 8,
    xaxis_fixedrange = True,
    yaxis_fixedrange = True,
    xaxis_categoryorder = 'total descending',
    title = 'Frequency of keywords across submissions, by area',
    legend_title = 'Area',
    **aspect(0.4)
).show(config=config)
ApplicationAnalyzeDecideMLVisDesignAlgorithmMLStatsModelHumanQuantTimeHumanQualMultiViewSocHumChartsNAAppStorytellingTheoryNetworkClusterAggHDImageVideoGenPublicWorkflowInteractionFeaturesTabularPerceptionMethodologySoftwareDimRedLifeBioComparisonScalarGeospatialBigDataTextScienceEngrDisplaysMixedInitOtherAppDataAbstrModelsNADataMultiResOtherDataCompSystemsSystemsGuidelinesArtUncertaintyColorDomain_TaskMotionDataMgmtFlowOtherTopicCompTopVector_TensorGraphicsCompBenchmarkDatasetsPersonalVisMapsImageProcessingSTARCollabMathVolumesOtherContribDeploymentHardwareIsosurfaces020406080100
AreaTheoretical & EmpiricalApplicationsSystems & RenderingRepresentations & InteractionData TransformationsAnalytics & DecisionsFrequency of keywords across submissions, by areaShort NameNumber of Submissions

How many submissions specified a given number of keywords?

Code

submissions['Number of Keywords'] = submissions['Keywords'].apply(lambda kw: len(kw.split('; '))).sort_values()
tmp = submissions.value_counts(['Number of Keywords', 'Area']).reset_index()

px.bar(tmp,
    x = 'Number of Keywords', 
    y = 0,
    barmode = 'stack',
    color = 'Area',
    custom_data=['Area'],
    labels = { '0': "Number of Submissions" },
).update_traces(
    hovertemplate = '%{y} submissions specified %{x} keywords in area "%{customdata}"<extra></extra>',
).update_layout(
    xaxis_dtick = 1,
    title = 'Keyword count per submission',
    **aspect(0.4)
).show(config=config)
1234567891011121314151617181920020406080100
AreaTheoretical & EmpiricalApplicationsSystems & RenderingRepresentations & InteractionData TransformationsAnalytics & DecisionsKeyword count per submissionNumber of KeywordsNumber of Submissions

Does keyword count correlate with decision?

Code

# TODO: group 10+ together

tmp = (submissions
    .assign(**{'Number of Keywords':
        submissions['Number of Keywords']
            .map(lambda x: str(x) if x < 10 else '>=10')
    })
    .value_counts(['Number of Keywords', 'Decision'])
    .groupby(level=0)
    .apply(lambda g: pd.DataFrame({0: g, 1: g/g.sum(), 2:g.sum()}))
    .reset_index()
)

px.bar(tmp,
    x = 'Number of Keywords', 
    y = 0,
    barmode = 'stack',
    color = 'Decision',
    custom_data=['Decision', 0, 2],
    labels = { '0': "Number of Submissions" },
).update_traces(
    hovertemplate = '%{customdata[1]} (%{y}) of %{customdata[2]} submissions with %{x} keywords had decision "%{customdata[0]}"<extra></extra>',
).update_layout(
    xaxis_dtick = 1,
    xaxis_type = 'category',
    xaxis_categoryorder = 'category ascending',
    yaxis_title = 'Submissions',
    title = 'Decisions by keyword count',
    **aspect(0.3)
).show(config=config)
123456789>=10050100
DecisionAcceptRejectDesk Reject (Scope)Desk Reject (Plagiarism)Decisions by keyword countNumber of KeywordsSubmissions

Do specific keywords correlate with decision?

Code

# do a manual histogram to include non-specified keywords
k_dec = (k_all
    .groupby(['Short Name', 'Decision'])
    .size()
    .groupby(level = 0)
    .apply(lambda g: pd.DataFrame({0: g, 1: 100*g/g.sum(), 2:g.sum()}))
    .reset_index()
)

px.bar(k_dec,
    x = 'Short Name',
    y = 0,
    color = 'Decision',
    custom_data = ['Decision', 1, 2],
).update_layout(
    xaxis_title = 'Keyword',
    yaxis_title = '',
    xaxis_dtick = 1,
    xaxis_tickfont_size = 8,
    title = 'Decision by presence of keyword',
    **aspect(0.4)
).update_traces(
    hovertemplate = "%{y} of %{customdata[2]} submissions (%{customdata[1]:.2f}%) specifying keyword '%{x}' had decision '%{customdata[0]}<extra></extra>",
).show(config=config)
GeospatialNetworkHDModelsScalarImageVideoTabularTimeTextVector_TensorOtherDataNADataAlgorithmDataAbstrDatasetsDeploymentMethodologyApplicationGuidelinesInteractionWorkflowSystemsSoftwareSTARDomain_TaskTheoryVisDesignOtherContribCompSystemsLifeBioMLStatsModelScienceEngrSocHumOtherAppNAAppCollabColorStorytellingAnalyzeDecideGenPublicMixedInitPerceptionPersonalVisClusterAggDataMgmtDimRedFeaturesBigDataMLMathCompTopIsosurfacesFlowVolumesCompBenchmarkHumanQualHumanQuantMotionArtMapsChartsComparisonGraphicsMultiViewImageProcessingDisplaysMultiResHardwareUncertaintyOtherTopic020406080100
DecisionAcceptRejectDesk Reject (Scope)Desk Reject (Plagiarism)Decision by presence of keywordKeyword

How often are keywords “esoteric”, i.e. used alone?

Code

tmp = (k_all.set_index('Paper ID').merge(submissions)
    .value_counts(['Short Name', 'Category', 'Number of Keywords'])
    .reset_index()
    .assign(**{'Number of Co-Keywords': (lambda x: x['Number of Keywords']-1)})
)

px.box(tmp,
    x = 'Short Name',
    y = 'Number of Co-Keywords',
    color = 'Category',
    # color_discrete_map=keyword_category_colors,
).update_layout(
    xaxis_dtick = 1,
    xaxis_tickfont_size = 8,
    **aspect(0.4)
).update_traces(
    width = .5,
    line_width = 1,
).show(config=config)
GeospatialNetworkHDModelsScalarImageVideoTabularTimeTextVector_TensorOtherDataNADataAlgorithmDataAbstrDatasetsDeploymentMethodologyApplicationGuidelinesInteractionWorkflowSystemsSoftwareSTARDomain_TaskTheoryVisDesignOtherContribCompSystemsLifeBioMLStatsModelScienceEngrSocHumOtherAppNAAppCollabColorStorytellingAnalyzeDecideGenPublicMixedInitPerceptionPersonalVisClusterAggDataMgmtDimRedFeaturesBigDataMLMathCompTopIsosurfacesFlowVolumesCompBenchmarkHumanQualHumanQuantMotionArtMapsChartsComparisonGraphicsMultiViewImageProcessingDisplaysMultiResHardwareUncertaintyOtherTopic05101520
CategoryContribution ThemeApplication AreaTopicData TypeShort NameNumber of Co-Keywords

How often are pairs of keywords specified together?

Code

k_pairs = (k_all
    .groupby('Paper ID')
    .apply(lambda g: pd.DataFrame(itertools.combinations(g['Short Name'].values, 2)))
    .join(submissions['Decision'])
)

tmp = k_pairs.groupby([0,1]).size().nlargest(40)
tmp = (
    k_pairs
    .set_index([0,1])
    .loc[tmp.index]
    .assign(p=lambda df: [' + '.join(v) for v in df.index.values])
    .value_counts(['p', 'Decision'], sort=False)
    .rename('c')
    .reset_index()
)

px.bar(tmp,
    x = 'p',
    y = 'c',
    color = 'Decision',
    custom_data = ['Decision'],
).update_layout(
    xaxis_title = 'Keyword Pair',
    yaxis_title = 'Submissions',
    xaxis_dtick = 1,
    xaxis_categoryorder = 'total descending',
    xaxis_tickfont_size = 8,
    title = 'Top 40 keyword pairs',
    **aspect(0.4)
).update_traces(
    hovertemplate = '%{y} submissions with keyword pair "%{x}" had decision "%{customdata[0]}"<extra></extra>',
).show(config=config)
AnalyzeDecide + ApplicationMLStatsModel + ApplicationTime + ApplicationAnalyzeDecide + MLStatsModelPerception + HumanQuantML + MLStatsModelHumanQual + HumanQuantML + AnalyzeDecideMultiView + ApplicationHD + DimRedML + ApplicationVisDesign + ApplicationAnalyzeDecide + MultiViewLifeBio + ApplicationImageVideo + ApplicationCharts + AnalyzeDecideApplication + GeospatialAnalyzeDecide + FeaturesVisDesign + AnalyzeDecideNAData + NAAppCharts + HumanQuantAnalyzeDecide + HumanQuantTime + AnalyzeDecideML + MultiViewImageVideo + MLDimRed + ClusterAggApplication + SocHumApplication + HumanQualApplication + FeaturesApplication + ComparisonAnalyzeDecide + WorkflowAnalyzeDecide + HumanQualStorytelling + GenPublicScienceEngr + ApplicationModels + MLStatsModelMLStatsModel + MultiViewML + FeaturesImageVideo + MLStatsModelHumanQual + StorytellingAlgorithm + NAApp0102030
DecisionAcceptRejectDesk Reject (Scope)Desk Reject (Plagiarism)Top 40 keyword pairsKeyword PairSubmissions

Code

cooc = (k_pairs
    .groupby([0,1])
    .size()
    .unstack()
    .reindex(index = keywords['Short Name'], columns = keywords['Short Name'])
    .fillna(0)
)

cooc = (cooc + cooc.T)
np.fill_diagonal(cooc.values, None)

px.imshow(cooc, 
    color_continuous_scale='portland',
).update_traces(
    connectgaps = False,
    hoverongaps = False,
    hovertemplate = "Keywords '%{x}' and '%{y}' are jointly specified in %{z} submissions<extra></extra>",
    colorbar_title = 'Number Of Submissions',
).update_layout(    
    xaxis_dtick = 1,
    xaxis_tickfont_size = 7,
    xaxis_title = 'Keyword',
    yaxis_dtick = 1,
    yaxis_tickfont_size = 7,
    yaxis_title = 'Keyword',
    hovermode = 'closest',
    xaxis_showgrid = False,
    yaxis_showgrid = False,
    title = 'Co-occurrence of keywords',
    **aspect(.7)
).show(config=config)
GeospatialNetworkHDModelsScalarImageVideoTabularTimeTextVector_TensorOtherDataNADataAlgorithmDataAbstrDatasetsDeploymentMethodologyApplicationGuidelinesInteractionWorkflowSystemsSoftwareSTARDomain_TaskTheoryVisDesignOtherContribCompSystemsLifeBioMLStatsModelScienceEngrSocHumOtherAppNAAppCollabColorStorytellingAnalyzeDecideGenPublicMixedInitPerceptionPersonalVisClusterAggDataMgmtDimRedFeaturesBigDataMLMathCompTopIsosurfacesFlowVolumesCompBenchmarkHumanQualHumanQuantMotionArtMapsChartsComparisonGraphicsMultiViewImageProcessingDisplaysMultiResHardwareUncertaintyOtherTopicOtherTopicUncertaintyHardwareMultiResDisplaysImageProcessingMultiViewGraphicsComparisonChartsMapsArtMotionHumanQuantHumanQualCompBenchmarkVolumesFlowIsosurfacesCompTopMathMLBigDataFeaturesDimRedDataMgmtClusterAggPersonalVisPerceptionMixedInitGenPublicAnalyzeDecideStorytellingColorCollabNAAppOtherAppSocHumScienceEngrMLStatsModelLifeBioCompSystemsOtherContribVisDesignTheoryDomain_TaskSTARSoftwareSystemsWorkflowInteractionGuidelinesApplicationMethodologyDeploymentDatasetsDataAbstrAlgorithmNADataOtherDataVector_TensorTextTimeTabularImageVideoScalarModelsHDNetworkGeospatial
051015202530Co-occurrence of keywordsKeywordKeyword

Code

k_triples = (k_all
    .groupby('Paper ID')
    .apply(lambda g: pd.DataFrame(itertools.combinations(g['Short Name'].values, 3)))
    .join(submissions['Decision'])
)

tmp = k_triples.groupby([0,1,2]).size().nlargest(40)
tmp = (
    k_triples
    .set_index([0,1,2])
    .loc[tmp.index]
    .assign(p=lambda df: [' + '.join(v) for v in df.index.values])
    .value_counts(['p', 'Decision'], sort=False)
    .rename('c')
    .reset_index()
)

px.bar(tmp,
    x = 'p',
    y = 'c',
    color = 'Decision',
    custom_data = ['p', 'Decision'],
).update_layout(
    xaxis_title = 'Keyword Triple',
    yaxis_title = 'Submissions',
    xaxis_dtick = 1,
    xaxis_categoryorder = 'total descending',
    xaxis_tickfont_size = 8,
    title = 'Top 40 keyword triples',
    **aspect(0.4)
).update_traces(
    hovertemplate = '%{y} submissions with keyword pair "%{x}" had decision "%{customdata[0]}"<extra></extra>',
).show(config=config)
ML + MLStatsModel + ApplicationML + AnalyzeDecide + MLStatsModelPerception + HumanQual + HumanQuantImageVideo + MLStatsModel + ApplicationML + AnalyzeDecide + FeaturesML + AnalyzeDecide + ApplicationCharts + AnalyzeDecide + ApplicationAnalyzeDecide + MultiView + ApplicationAnalyzeDecide + MLStatsModel + MultiViewAnalyzeDecide + Application + FeaturesTime + AnalyzeDecide + ApplicationMLStatsModel + Application + GeospatialML + MultiView + ApplicationML + MLStatsModel + MultiViewML + ClusterAgg + FeaturesHumanQual + Storytelling + HumanQuantCharts + Perception + HumanQuantCharts + HumanQual + HumanQuantAnalyzeDecide + MLStatsModel + ApplicationAlgorithm + HD + DimRedML + AnalyzeDecide + MultiViewCharts + AnalyzeDecide + HumanQualCharts + AnalyzeDecide + ClusterAggAnalyzeDecide + Workflow + HumanQualAnalyzeDecide + HumanQual + HumanQuantPerception + Storytelling + HumanQuantML + Models + MLStatsModelML + MLStatsModel + ClusterAggML + DimRed + MLStatsModelML + DimRed + ClusterAggHD + DimRed + ClusterAggCharts + AnalyzeDecide + HumanQuantApplication + HumanQual + HumanQuantAnalyzeDecide + MLStatsModel + HumanQualAnalyzeDecide + Features + ComparisonAnalyzeDecide + DimRed + ClusterAggAnalyzeDecide + Application + WorkflowAnalyzeDecide + Application + Domain_TaskAnalyzeDecide + Application + ComparisonAlgorithm + BigData + Scalar0510
DecisionAcceptRejectDesk Reject (Scope)Top 40 keyword triplesKeyword TripleSubmissions

What is the distribution of match scores by keyword?

Code

tmp = (matchscores.T
    .stack()
    .rename('Score')
    .loc[lambda x: x > -1.0]
    .reset_index()
    .merge(k_all.loc[:,['Paper ID', 'Short Name', 'Category']], on='Paper ID')
    .reset_index()
)

px.box(tmp,
    x = 'Short Name',
    y = 'Score',
    color = 'Category',
).update_layout(
    xaxis_dtick = 1,
    xaxis_tickfont_size = 8,
    **aspect(0.4)
).update_traces(
    width = .5,
    line_width = 1,
).show(config=config)
GeospatialNetworkHDModelsScalarImageVideoTabularTimeTextVector_TensorOtherDataNADataAlgorithmDataAbstrDatasetsDeploymentMethodologyApplicationGuidelinesInteractionWorkflowSystemsSoftwareSTARDomain_TaskTheoryVisDesignOtherContribCompSystemsLifeBioMLStatsModelScienceEngrSocHumOtherAppNAAppCollabColorStorytellingAnalyzeDecideGenPublicMixedInitPerceptionPersonalVisClusterAggDataMgmtDimRedFeaturesBigDataMLMathCompTopIsosurfacesFlowVolumesCompBenchmarkHumanQualHumanQuantMotionArtMapsChartsComparisonGraphicsMultiViewImageProcessingDisplaysMultiResHardwareUncertaintyOtherTopic00.20.40.60.81
CategoryData TypeContribution ThemeTopicApplication AreaShort NameScore

What is the number of “high” match scores, per submission?

Code

tmp = (matchscores.T
    .drop(submissions
        .query('Decision not in ["Accept", "Reject"]')
        .index
    )
    .where(lambda x: x > -1.0, None)
)

threshold = [0.5, 0.7, 0.9]

tmp = pd.concat([ 
    (tmp >= q).agg('sum', axis=1).rename('>= %.1f' % q) 
    for q in threshold 
], axis=1)

px.bar(tmp,
    barmode = 'overlay',
    opacity = 1,
).update_layout(
    legend_title = 'Reviewer Matches',
    bargap = 0.1,
    xaxis_type = 'category',
    xaxis_tickfont_size = 8,
    yaxis_title = 'Count',
    **aspect(0.4)
).update_traces(
    hovertemplate = 'Submission %{x} has %{y} matches %{fullData.name}<extra></extra>'
).show(config=config)
09182736455463728191100109118127137146155164173182192202211221230239248258267277286296305314323332341350359368377386395404413422431440050100150
Reviewer Matches>= 0.5>= 0.7>= 0.9Paper IDCount

Which are the 10 submissions with the lowest number of “good” (>= 0.5) match scores?

Code

ind = tmp['>= 0.5'].nsmallest(10).index

(tmp
    .merge(bids
        .query('`Paper ID` in @ind and Bid in ["willing", "want"]')
        .value_counts(['Paper ID'])
        .rename("Pos. Bids"),
        on='Paper ID')
    .merge(submissions, on='Paper ID')
    .sort_values('>= 0.5')
    .assign(Keywords = lambda df: df.Keywords
        .str.split('; ')
        .apply(lambda x: ', '.join(
            [keywords.set_index('Keyword').loc[k, 'Short Name'] for k in x]
    )))
    .rename(columns = {'Number of Keywords': '# Keywords'})
)
>= 0.5 >= 0.7 >= 0.9 Pos. Bids Decision Keywords Area # Keywords
Paper ID
287 0 0 0 14 Reject OtherContrib, OtherTopic Theoretical & Empirical 2
258 50 31 11 10 Accept Vector_Tensor, CompTop, Flow Data Transformations 3
319 52 28 28 27 Reject Flow Data Transformations 1
333 68 23 7 41 Accept Storytelling, Motion, Art Representations & Interaction 3
435 70 24 7 14 Reject Color, ClusterAgg, Flow Theoretical & Empirical 3
179 71 20 10 10 Accept ScienceEngr, CompTop Applications 2
202 71 20 10 12 Reject ScienceEngr, CompTop Data Transformations 2
407 71 34 6 18 Reject Scalar, CompSystems, Volumes, Graphics, ImageP... Systems & Rendering 5
200 73 29 8 22 Reject Vector_Tensor, Flow, Comparison, Uncertainty Theoretical & Empirical 4
37 74 15 5 32 Reject ImageVideo, SocHum, GenPublic, ML, Art Applications 5

What is the number of “high” match scores, per keyword?

Code

tmp = (matchscores.T
    .drop(submissions
        .query('Decision not in ["Accept", "Reject"]')
        .index
    )
    .where(lambda x: x > -1.0, None)
)

tmp = (k_all
    .set_index('Paper ID')['Short Name']
    .to_frame()
    .merge(tmp, left_index=True, right_index=True, how='inner')
    .set_index('Short Name')
)

tmp = (pd
    .concat([ 
        (tmp >= q).agg('sum', axis=1).rename('>= %.1f' % q) 
        for q in threshold 
    ], axis=1)
    .groupby('Short Name')
    .mean()
)

px.bar(tmp,
    barmode = 'overlay',
    opacity = 1,
).update_layout(
    legend_title = 'Reviewer Matches',
    bargap = 0.1,
    xaxis_dtick = 1,
    xaxis_type = 'category',
    xaxis_tickfont_size = 8,
    yaxis_title = 'Count',
    **aspect(0.4)
).update_traces(
    hovertemplate = 'Keyword %{x} has %{y:.1f} matches %{fullData.name}<extra></extra>'
).show(config=config)
GeospatialNetworkHDModelsScalarImageVideoTabularTimeTextVector_TensorOtherDataNADataAlgorithmDataAbstrDatasetsDeploymentMethodologyApplicationGuidelinesInteractionWorkflowSystemsSoftwareSTARDomain_TaskTheoryVisDesignOtherContribCompSystemsLifeBioMLStatsModelScienceEngrSocHumOtherAppNAAppCollabColorStorytellingAnalyzeDecideGenPublicMixedInitPerceptionPersonalVisClusterAggDataMgmtDimRedFeaturesBigDataMLMathCompTopIsosurfacesFlowVolumesCompBenchmarkHumanQualHumanQuantMotionArtMapsChartsComparisonGraphicsMultiViewImageProcessingDisplaysMultiResHardwareUncertaintyOtherTopic050100
Reviewer Matches>= 0.5>= 0.7>= 0.9Short NameCount

Bidding

How many bids did individual PC members make?

Code

tmp = (bids
    .value_counts(['Reviewer', 'Bid'], sort=False)
    .reset_index()
    .rename(columns = {0: 'Number of Bids'})
    # .sort_values()
)

px.bar(tmp,
    x = 'Reviewer',
    y = 'Number of Bids',
    color = 'Bid'
).update_layout(
    xaxis_type = 'category',
    xaxis_categoryorder = 'total descending',
    xaxis_showticklabels = False,
    **aspect(0.4)
).update_traces(
    hovertemplate = 'Reviewer %{x} made %{y} "%{fullData.name}" bids.<extra></extra>'
).show(config=config)
0100200300400
BidconflictreluctantwantwillingReviewerNumber of Bids

How many (positive) bids did each submission receive?

Code

tmp = (bids
    .value_counts(['Paper ID', 'Bid'], sort=False)
    .reset_index()
    .loc[lambda x: x.Bid.isin(['want', 'willing'])]
    .rename(columns = {0: 'Number of Bids'})
)

px.bar(tmp,
    x = 'Paper ID',
    y = 'Number of Bids',
    color = 'Bid'
).update_layout(
    xaxis_type = 'category',
    xaxis_categoryorder = 'total descending',
    xaxis_showticklabels = False,
    title = 'Positive Bids per Paper',
    **aspect(0.4),
).update_traces(
    hovertemplate = 'Paper %{x} received %{y} "%{fullData.name}" bids.<extra></extra>',
).show(config=config)
010203040
BidwantwillingPositive Bids per PaperPaper IDNumber of Bids

Code

popular = 15

tmp = (bids
    .query('Bid in ["want", "willing"]')
    .value_counts(['Paper ID', 'Bid'], sort=False)
    .unstack()
    .fillna(0)
    .groupby(['want', 'willing'])
    .apply(lambda g: pd.Series({'ids': g.index.values, 'count': g.index.size}))
    .reset_index()
    .assign(popular = 
            lambda df: np.where( df['willing']+df['want']>=popular, ">= %d" % popular, "< %d" % popular)
))

px.scatter(tmp,
    x = 'willing',
    y = 'want',
    size = 'count',
    color = 'popular',
    custom_data = ['count', 'ids'],
).update_layout(
    legend_title = 'Total Pos. Bids',
    title = 'Distribution of Positive Bids',
    **aspect(0.4)
).update_traces(
    hovertemplate = '%{customdata[0]} papers received %{x} "willing" and %{y} "want" bids',
).show(config=config)
5101520253035051015
Total Pos. Bids< 15>= 15Distribution of Positive Bidswillingwant

Does the presence of specific keywords correlate with bidding?

We run a reviewer-independent ridge regression model where the independent variable is the overall reviewer interest, and the dependent variable is the (weighted) presence of a keyword. We measure interest by giving each “willing” or “want” bid a score of 1:

Code

tmp_3 = keywords.copy()
tmp_3['ix'] = list(range(len(tmp_3)))
tmp_3 = tmp_3[['Short Name', 'ix']]
tmp_1 = k_all[['Paper ID', 'Short Name']]
tmp_2 = bids[(bids['Bid'] == 'willing') | (bids['Bid'] == 'want')]
df = tmp_1.merge(tmp_3, on="Short Name").merge(tmp_2, on="Paper ID")
df['weight'] = 2
df.loc[df['Bid'] == 'willing', 'weight'] = 1
total_weight = df[['Paper ID', 'ix', 'weight']].groupby(['Paper ID', 'ix']).sum().reset_index()
keyword_count = tmp_1.groupby(['Paper ID']).count().reset_index()
keyword_count['Keyword Weight'] = 1.0/keyword_count['Short Name']
total_weight = total_weight.merge(keyword_count[['Paper ID', 'Keyword Weight']], on="Paper ID")
nrows = max(total_weight['Paper ID']) + 1
ncols = max(total_weight['ix']) + 1
design_matrix = np.zeros((nrows, ncols))
design_matrix.shape
rhs = np.zeros(nrows)

# this is embarrassing, there must be a fancy pandas way of doing it.
# someone else can figure it out.
for i, row in total_weight.iterrows():
    design_matrix[int(row['Paper ID']), int(row['ix'])] = row['Keyword Weight']
    rhs[int(row['Paper ID'])] = row['weight']
import scipy.linalg
from sklearn.linear_model import Ridge
# Ideally, we find the best regularizer by splitting into training/validation,
# but on inspection the order doesn't seem to change too much 
lr = Ridge(1).fit(design_matrix, rhs)
lr.coef_
tmp_3['Importance'] = lr.coef_
tmp_3 = tmp_3.sort_values(by=['Importance']).merge(keywords, on='Short Name', )

px.scatter(tmp_3, 
    x="Short Name", 
    y="Importance", 
    color='Category',
    custom_data = ['Keyword'],
).update_layout(
    title = 'Keyword Importance for Bidding',
    xaxis_dtick = 1,
    xaxis_categoryorder = 'trace',
    # xaxis_categoryarray = tmp_3['Short Name'],
    xaxis_tickfont_size = 8,
    **aspect(0.4)
).update_traces(
    hovertemplate = 'Importance of "%{customdata[0]}": %{y}<extra></extra>'
).show(config=config)
WorkflowApplicationOtherContribDataAbstrTheoryDomain_TaskDeploymentSoftwareSTARVisDesignGuidelinesInteractionDatasetsMethodologyAlgorithmSystemsCompSystemsScienceEngrLifeBioOtherAppSocHumMLStatsModelNAAppComparisonColorCompTopMathGraphicsFeaturesImageProcessingCompBenchmarkPersonalVisDataMgmtMapsClusterAggOtherTopicArtCollabMultiViewHardwareAnalyzeDecideIsosurfacesMultiResChartsUncertaintyFlowPerceptionBigDataMLGenPublicHumanQuantMixedInitVolumesMotionDimRedHumanQualDisplaysStorytellingVector_TensorTextImageVideoOtherDataModelsNADataGeospatialTimeNetworkScalarTabularHD−10010
CategoryContribution ThemeApplication AreaTopicData TypeKeyword Importance for BiddingShort NameImportance

Assignment

How many papers were PC members assigned?

Code

tmp = assignments.value_counts(['Reviewer']).reset_index()

px.histogram(tmp,
    x = 0,
).update_traces(
    hovertemplate = '%{y} reviewers were assigned %{x} submissions',
).update_layout(
    bargap = .1,
    xaxis_title = 'Number of Assignments',
    yaxis_title = 'Number of PC members',
    **aspect(0.4)
).show(config=config)
123456020406080100
Number of AssignmentsNumber of PC members

Code

tmp = assignments.value_counts(['Reviewer', 'Role']).reset_index()

px.histogram(tmp,
    x = 0,
    color = 'Role',
).update_traces(
    hovertemplate = '%{y} reviewers were assigned %{x} submissions<extra></extra>'
).update_layout(
    bargap = .1,
    barmode = 'group',
    xaxis_title = 'Number of Assignments',
    yaxis_title = 'Number of PC Members',
    **aspect(0.4)
).show(config=config)
1234020406080
RolePrimarySecondaryNumber of AssignmentsNumber of PC Members

How many areas did reviewers review in?

Code

tmp = (assignments
    .merge(submissions, on='Paper ID')
    .groupby('Reviewer')
    .apply(lambda x: len(x['Area'].unique()))
    .reset_index())

px.histogram(tmp,
    x = 0,
).update_traces(
    hovertemplate = '%{y} PC members were assigned submissions from %{x} area(s)',
).update_layout(
    bargap = .1,
    xaxis_title = 'Number of Areas',
    yaxis_title = 'Number of PC members',
    **aspect(0.4),
).show(config=config)
12345020406080
Number of AreasNumber of PC members

How do match scores correlate with bids?

Code

tmp = bids.assign(
    Score = bids.apply(lambda x: (matchscores.loc[x['Reviewer'], x['Paper ID']]), axis=1),
    Area  = bids.apply(lambda x: (submissions.loc[x['Paper ID'], 'Area']), axis=1) 
)

px.box(tmp,
    x = 'Bid',
    y = 'Score',
    color = 'Bid',
).update_layout(
    showlegend = False,
    xaxis_categoryorder = 'array',
    xaxis_categoryarray = ['want', 'willing', 'reluctant', 'conflict'],
    **aspect(0.4)
).update_traces(
    line_width = 2,
    boxmean = True
).show(config=config)
wantwillingreluctantconflict−1−0.500.51
BidScore

Code

# remove "invalid" match scores
tmp2 = tmp[tmp['Score'] > -1.0]

px.violin(tmp2,
    x = 'Bid',
    y = 'Score',
    color = 'Area',
    box = True,
).update_layout(
    # showlegend = False,
    title = 'Match scores by bid by area',
    xaxis_categoryorder = 'array',
    xaxis_categoryarray = ['want', 'willing', 'reluctant', 'conflict'],
    violingap=0.2, 
    violingroupgap=0.1,
    **aspect(0.4)
).update_traces(
    box_line_color = 'black',
    box_line_width = 1,
    line_width = 0,
    meanline_visible=True,
    marker_size = 4,
    # boxpoints = 'outliers',
).show(config=config)
wantwillingreluctantconflict00.51
AreaTheoretical & EmpiricalApplicationsSystems & RenderingRepresentations & InteractionData TransformationsAnalytics & DecisionsMatch scores by bid by areaBidScore

How often were reviewers assigned submissions that they bid on?

Code

tmp = (
    assignments
        .merge(bids, on=['Reviewer', 'Paper ID'], how='left')
        .fillna('no bid')
        .value_counts(['Role', 'Bid'])
        .rename('Reviewers')
        .reset_index()
)

fig = px.bar(tmp,
    y = 'Reviewers',
    x = 'Role',
    color = 'Bid',
    custom_data = ['Bid']
).update_traces(
    hovertemplate = '%{y} PC members assigned as %{x} bid %{customdata}<extra></extra>',
).update_layout(
    title = "Assignment by bidding",
    **aspect(0.4),
)

fig.show(config=config)
PrimarySecondary0100200300400
Bidwantwillingno bidreluctantAssignment by biddingRoleReviewers

Code

from IPython.core.display import HTML
HTML("""
<style>
@font-face {
  font-family: 'Fira Sans';
  font-style: normal;
  font-weight: 400;
  src: url(https://fonts.gstatic.com/s/firasans/v11/va9E4kDNxMZdWfMOD5Vvl4jLazX3dA.woff2) format('woff2');
  unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
}

@font-face {
  font-family: 'Fira Sans Medium';
  font-style: normal;
  font-weight: 500;
  font-display: swap;
  src: url(https://fonts.gstatic.com/s/firasans/v11/va9B4kDNxMZdWfMOD5VnZKveRhf6Xl7Glw.woff2) format('woff2');
  unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
}</style>
""")